package eshore.cn.it.solrhandler.hand;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.SolrServerException;
import org.springframework.beans.factory.annotation.Required;

import com.csvreader.CsvReader;

public class FileIndexHandler extends IndexHandler{
	/**
	 * 文件建立索引的起始位置，注意如果第一行是标题，请往后推一行
	 */
	private int begin = 0;
	public int getBegin() {
		return begin;
	}
	public void setBegin(int begin) {
		this.begin = begin;
	}

	/**
	 * 每次提交的数量
	 */
	private int commitNumber = 2000;
	public int getCommitNumber() {
		return commitNumber;
	}
	public void setCommitNumber(int commitNumber) {
		this.commitNumber = commitNumber;
	}
	
	/**
	 * 指定文件编码类型,默认是UTF-8
	 */
	private String fileEncoding = "UTF-8";
	public String getFileEncoding() {
		return fileEncoding;
	}
	public void setFileEncoding(String fileEncoding) {
		this.fileEncoding = fileEncoding;
	}
	
	/**
	 * 指定需要建立索引的文件名
	 */
	
	private String fileName;
	
	public String getFileName() {
		return fileName;
	}
	@Required
	public void setFileName(String fileName) {
		this.fileName = fileName;
	}

	/**
	 * 指定每行数据分割的符号
	 */
	private char lineSeparate = ',';
	
	
	public char getLineSeparate() {
		return lineSeparate;
	}
	public void setLineSeparate(char lineSeparate) {
		this.lineSeparate = lineSeparate;
	}

	/**
	 * 指定每行数据被包围的字符串
	 */
	private char colSurround = '"';
	public char getColSurround() {
		return colSurround;
	}
	public void setColSurround(char colSurround) {
		this.colSurround = colSurround;
	}

	/**
	 * 指定是否含有表头：默认是含有表头
	 * 如果没有表头，则需要人工配置表头
	 * 需要配置colNames属性，而且要和每一列一一对应
	 */
	private boolean haveHeader;
	
	public boolean isHaveHeader() {
		return haveHeader;
	}
	@Required
	public void setHaveHeader(boolean haveHeader) {
		this.haveHeader = haveHeader;
	}

	
	/**
	 * 开始建立索引
	 */
	public void doFileIndex() {
		//初始化建立索引者
		initIndexHandler();
		
		//设置内存使用,并且初始化数组长度
		List<String[]> datas = new ArrayList<String[]>(this.getCommitNumber());
		//记录当前处理的文件的记录数
		int mark = 0;
		
		try {
			while(this.csvReader.readRecord()) {
				if (mark >= this.getBegin()) {
					datas.add(this.csvReader.getValues());
					mark++;
				} else {
					this.csvReader.getValues();
					mark++;
				}
				//批量提交Solr索引
				if (datas.size() == this.getCommitNumber()) {
					try {
						this.doIndexTask(datas);
						datas.clear();
						System.out.println("已经成功处理了 [" + (mark - this.getBegin()) + "] 条记录...");
					} catch (SolrServerException e) {
						e.printStackTrace();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
		} catch (IOException e1) {
			e1.printStackTrace();
		} 
		
		//把最后没有提交的数据提交
		if (datas.size() > 0) {
			try {
				this.doIndexTask(datas);
				datas.clear();
				System.out.println("总记录数：[" + mark + "] ，记录已经全部处理完毕！");
			} catch (SolrServerException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		
		destroyIndexHandler();
	}
	
	
	@Override
	protected void destroyIndexHandler() {
		this.csvReader.close();
	}
	
	@Override
	protected void initIndexHandler() {
		try {
			this.csvReader = new CsvReader(this.getFileName(),
					this.getLineSeparate(),Charset.forName(this.getFileEncoding()));
			this.csvReader.setTextQualifier(this.getColSurround());
			this.csvReader.setTrimWhitespace(true);
			//this.csvReader.setSkipEmptyRecords(true);
			
			//这里是表示读取表头，网上大部分教程的注释是错误的，并不是表示跳过表头
			this.csvReader.readHeaders(); 
			for (String str : this.csvReader.getHeaders())
	    		System.out.print(str + ",");    
	    	System.out.println("");
			if(haveHeader == true) {
				this.setColNames(this.csvReader.getHeaders());
			}
			int[] ids = new int[this.getIndexColNames().length];
			for (int j = 0; j < this.getIndexColNames().length; j++) {
				if (getColNames() == null || getColNames().length == 0) {
					System.err.println("你可能没有导入需要建立索引的原始数据，或者没有初始化列名，请检查！");
					System.exit(1);
				}
				int i = 0;
				for (; i < getColNames().length; i++) {
					if (this.getIndexColNames()[j].equals(getColNames()[i])) {
						ids[j] = i;
						break;
					}
				}
				if (i >= getColNames().length) {
					System.out.println("指定的列名 [" + this.getIndexColNames()[j] +
							"] 列名中不存在，请检查配置。");
				}
			}
			this.setIndexIds(ids);
		} catch (IOException e) {
			System.err.println("需要建立索引的文件不存在！" + e.getMessage());
			System.exit(1);
		}
	}

	private CsvReader csvReader;
}
